import matplotlib.image as mpimg
import matplotlib.pyplot as plt
import numpy as np
import cv2
from skimage.feature import hog
import time
from sklearn.svm import LinearSVC
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from moviepy.editor import VideoFileClip
import queue
from scipy.ndimage import label
import random
import glob
import os
vehicles_dir = os.path.join('..', 'data', 'vehicles')
non_vehicles_dir = os.path.join('..', 'data', 'non-vehicles')
# images are divided up into vehicles and non-vehicles
cars = []
notcars = []
# Read vehicle images
images = glob.iglob(vehicles_dir + '/*/*.png', recursive=True)
for image in images:
cars.append(image)
# Read non-vehicle images
images = glob.iglob(non_vehicles_dir + '/*/*.png', recursive=True)
for image in images:
notcars.append(image)
def visualize_images(input_images, num_cols, figure_name, cmap = None):
#Shows input images by stacking them in num_cols columns
fig, axes = plt.subplots((int)((len(input_images) + 1) /num_cols), num_cols, figsize=(24, 20))
fig = plt.gcf()
fig.canvas.set_window_title(figure_name)
print(figure_name)
for ax, image in zip(axes.flat, input_images):
if(cmap == "gray" or cmap == 'hot'):
ax.imshow(image, cmap=cmap)
elif(image.shape[2]==1):
ax.imshow(image[:,:,0], cmap = cmap)
else:
ax.imshow(image, cmap=cmap)
plt.show()
#Visualize some input images to test the dataset
#choosing random samples from cars and non-cars images
num_images = 10
cars_samples = random.sample(list(cars), num_images)
notcar_samples = random.sample(list(notcars), num_images)
# Read in car / not-car images
car_images = []
notcar_images = []
for sample in cars_samples:
car_images.append(mpimg.imread(sample))
for sample in notcar_samples:
notcar_images.append(mpimg.imread(sample))
visualize_images(car_images, num_images, "Example Car images")
visualize_images(notcar_images, num_images, "Example not-car images")
#Functions to generate more images
def translate_image(image, pos_range=2):
"""Perturbs the image by translating the image by - pos_range to + pos_range pixels"""
rows, cols = image.shape[:2]
pos_x = pos_range * np.random.uniform() - pos_range/2
pos_y = pos_range * np.random.uniform() - pos_range/2
pos_M = np.float32([[1, 0, pos_x],[0, 1, pos_y]])
return cv2.warpAffine(image, pos_M, (cols,rows))
#Scaling Image to be bigger accordinng to the scale size
def scale_image(image, scale = [1.0, 1.1, 1.2]):
scale_M = random.sample(scale, 1)[0]
image = cv2.resize(image, None, fx = scale_M, fy = scale_M, interpolation = cv2.INTER_AREA)
return image[0:64, 0:64, :]
#Rotating image by an angle
def rotate_image(image, rot_range = 2):
rows, cols = image.shape[:2]
rot = np.random.uniform(rot_range)- rot_range/2
rot_M = cv2.getRotationMatrix2D((cols/2, rows/2), rot, 1)
return cv2.warpAffine(image, rot_M, (cols,rows))
#Correcting deformations that occur with non-ideal camera angles
def affine_image(image, affine_range = 2):
#Affine Transform
rows, cols = image.shape[:2]
pts1 = np.float32([[5, 5],[10, 5],[5, 10]])
pt1 = 5 + affine_range * np.random.uniform() - affine_range/2
pt2 = 10 + affine_range * np.random.uniform() - affine_range/2
pts2 = np.float32([[pt1, 5],[pt2, pt1],[5, pt2]])
affine_M = cv2.getAffineTransform(pts1, pts2)
return cv2.warpAffine(image, affine_M, (cols,rows))
#Applying the previous functions
def transform_image(image, pos_range = 2, rot_range = 2, scale = [1, 1.1], affine_range = 2):
image = translate_image(image, pos_range=5)
image = rotate_image(image, rot_range = 5)
image = scale_image(image, scale = [1.2, 1.4, 1.0])
is_blurred = random.sample([0, 1], 1)
if is_blurred[0]:
image = cv2.GaussianBlur(image,(3,3),0)
return image
def convert_color(img, conv='RGB2YCrCb'):
#Converting the image from one color space to another
if conv == 'RGB2YCrCb':
return cv2.cvtColor(img, cv2.COLOR_RGB2YCrCb)
if conv == 'BGR2YCrCb':
return cv2.cvtColor(img, cv2.COLOR_BGR2YCrCb)
if conv == 'RGB2LUV':
return cv2.cvtColor(img, cv2.COLOR_RGB2LUV)
if conv == 'RGB2HLS':
return cv2.cvtColor(img, cv2.COLOR_RGB2HLS)
if conv == 'RGB2HSV':
return cv2.cvtColor(img, cv2.COLOR_RGB2HSV)
if conv == 'Gray':
return cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
if conv == 'RGB2YUV':
return cv2.cvtColor(img, cv2.COLOR_RGB2YUV)
#Extracting hog features from an image
def get_hog_features(img, orient, pix_per_cell, cell_per_block,
vis=False, feature_vec=True):
#Return 2 outputs if vis=true
if vis == True:
features, hog_image = hog(img, orientations=orient, pixels_per_cell=(pix_per_cell, pix_per_cell),
cells_per_block=(cell_per_block, cell_per_block), transform_sqrt=True,
visualize=vis, feature_vector=feature_vec)
return features, hog_image
# Return one output
else:
features = hog(img, orientations=orient, pixels_per_cell=(pix_per_cell, pix_per_cell),
cells_per_block=(cell_per_block, cell_per_block), transform_sqrt=True,
visualize=vis, feature_vector=feature_vec)
return features
# function to flatten 2D arrays to 1D
def bin_spatial(img, size=(16, 16)):
color1 = cv2.resize(img[:,:,0], size).ravel()
color2 = cv2.resize(img[:,:,1], size).ravel()
color3 = cv2.resize(img[:,:,2], size).ravel()
return np.hstack((color1, color2, color3))
# Define a function to compute color histogram features
def color_hist(img, nbins=32, bins_range=(0, 256)):
# Compute the histogram of the color channels separately
channel1_hist = np.histogram(img[:,:,0], bins=nbins, range=bins_range)
channel2_hist = np.histogram(img[:,:,1], bins=nbins, range=bins_range)
channel3_hist = np.histogram(img[:,:,2], bins=nbins, range=bins_range)
# Concatenate the histograms into a single feature vector
hist_features = np.concatenate((channel1_hist[0], channel2_hist[0], channel3_hist[0]))
# Return the individual histograms, bin_centers and feature vector
return hist_features
# Define a function to extract features from a list of images
def extract_features(imgs, cspace='RGB', spatial_size=(32, 32),
hist_bins=32, orient=9,
pix_per_cell=8, cell_per_block=2, hog_channel='ALL',
spatial_feat=False, hist_feat=False, hog_feat=True):
# Create a list to append feature vectors to
features = []
# Iterate through the list of images
for file in imgs:
file_features = []
# Read in each one by one
image = mpimg.imread(file)
image = (image * 255).astype(np.uint8)
# apply color conversion if other than 'RGB'
if cspace != 'RGB':
if cspace == 'HSV':
feature_image = cv2.cvtColor(image, cv2.COLOR_RGB2HSV)
elif cspace == 'LUV':
feature_image = cv2.cvtColor(image, cv2.COLOR_RGB2LUV)
elif cspace == 'HLS':
feature_image = cv2.cvtColor(image, cv2.COLOR_RGB2HLS)
elif cspace == 'YUV':
feature_image = cv2.cvtColor(image, cv2.COLOR_RGB2YUV)
elif cspace == 'YCrCb':
feature_image = cv2.cvtColor(image, cv2.COLOR_RGB2YCrCb)
else: feature_image = np.copy(image)
if spatial_feat == True:
spatial_features = bin_spatial(feature_image, size=spatial_size)
file_features.append(spatial_features)
if hist_feat == True:
# Apply color_hist()
hist_features = color_hist(feature_image, nbins=hist_bins)
file_features.append(hist_features)
if hog_feat == True:
# Call get_hog_features() with vis=False, feature_vec=True
hog_features = []
for channel in range(feature_image.shape[2]):
hog_features.append(get_hog_features(feature_image[:,:,channel],
orient, pix_per_cell, cell_per_block,
vis=False, feature_vec=True))
hog_features = np.ravel(hog_features)
# Append the new feature vector to the features list
file_features.append(hog_features)
features.append(np.concatenate(file_features))
# Return list of feature vectors
return features
#Training the classifier using SVC
colorspace = 'YUV' # Can be RGB, HSV, LUV, HLS, YUV, YCrCb
orient = 9
pix_per_cell = 8
cell_per_block = 2
hog_channel = 'ALL' # Can be 0, 1, 2, or "ALL"
spatial_size=(32, 32)
hist_bins=32
t=time.time()
#Extracting car features and non-car features
car_features = extract_features(cars, cspace=colorspace, orient=orient,
pix_per_cell=pix_per_cell, cell_per_block=cell_per_block,
hog_channel=hog_channel, hist_bins=hist_bins)
notcar_features = extract_features(notcars, cspace=colorspace, orient=orient,
pix_per_cell=pix_per_cell, cell_per_block=cell_per_block,
hog_channel=hog_channel, hist_bins=hist_bins)
t2 = time.time()
print(round(t2-t, 2), 'Seconds to extract HOG features...')
# Create an array stack of feature vectors
X = np.vstack((car_features, notcar_features)).astype(np.float64)
print(X.shape)
# Fit a per-column scaler
X_scaler = StandardScaler().fit(X)
# Apply the scaler to X
scaled_X = X_scaler.transform(X)
# Define the labels vector
y = np.hstack((np.ones(len(car_features)), np.zeros(len(notcar_features))))
print(len(y))
# Split up data into randomized training and test sets
rand_state = np.random.randint(0, 100)
X_train, X_test, y_train, y_test = train_test_split(
scaled_X, y, test_size=0.15, random_state=rand_state)
print('Using:',orient,'orientations',pix_per_cell,
'pixels per cell and', cell_per_block,'cells per block')
print('Feature vector length:', len(X_train[0]))
# Use a linear SVC X_scaler
svc = LinearSVC()
# Check the training time for the SVC
t=time.time()
svc.fit(X_train, y_train)
t2 = time.time()
print(round(t2-t, 2), 'Seconds to train SVC...')
# Check the score of the SVC
print('Test Accuracy of SVC = ', round(svc.score(X_test, y_test), 4))
# Check the prediction time for a single sample
t=time.time()
n_predict = 10
print('My SVC predicts: ', svc.predict(X_test[0:n_predict]))
print('For these',n_predict, 'labels: ', y_test[0:n_predict])
t2 = time.time()
print(round(t2-t, 5), 'Seconds to predict', n_predict,'labels with SVC')
#Read cars and not-cars images
#Data folders
test_images_dir = os.path.join('..', 'test_images')
# images are divided up into vehicles and non-vehicles
test_images = []
images = glob.glob(test_images_dir + '/*.jpg')
for image in images:
test_images.append(mpimg.imread(image))
# Define a single function that can extract features using hog sub-sampling and make predictions
def find_cars(img, ystart, ystop, scale, svc, X_scaler, orient, pix_per_cell, cell_per_block, spatial_size, hist_bins, vis_bboxes = False):
draw_img = np.copy(img)
xstart = int(img.shape[1]/5)
xstop = img.shape[1]
img_tosearch = img[ystart:ystop, xstart:xstop,:]
ctrans_tosearch = convert_color(img_tosearch, conv='RGB2YUV')
if scale != 1:
imshape = ctrans_tosearch.shape
ctrans_tosearch = cv2.resize(ctrans_tosearch, (np.int(imshape[1]/scale), np.int(imshape[0]/scale)))
ch1 = ctrans_tosearch[:,:,0]
ch2 = ctrans_tosearch[:,:,1]
ch3 = ctrans_tosearch[:,:,2]
# Define blocks and steps as above
nxblocks = (ch1.shape[1] // pix_per_cell) - cell_per_block + 1
nyblocks = (ch1.shape[0] // pix_per_cell) - cell_per_block + 1
nfeat_per_block = orient*cell_per_block**2
# 64 was the orginal sampling rate, with 8 cells and 8 pix per cell
window = 64
nblocks_per_window = (window // pix_per_cell) - cell_per_block + 1
cells_per_step = 2 # Instead of overlap, define how many cells to step
nxsteps = (nxblocks - nblocks_per_window) // cells_per_step
nysteps = (nyblocks - nblocks_per_window) // cells_per_step
# Compute individual channel HOG features for the entire image
hog1 = get_hog_features(ch1, orient, pix_per_cell, cell_per_block, feature_vec=False)
hog2 = get_hog_features(ch2, orient, pix_per_cell, cell_per_block, feature_vec=False)
hog3 = get_hog_features(ch3, orient, pix_per_cell, cell_per_block, feature_vec=False)
rectangles = []
for xb in range(nxsteps):
for yb in range(nysteps):
ypos = yb*cells_per_step
xpos = xb*cells_per_step
# Extract HOG for this patch
hog_feat1 = hog1[ypos:ypos+nblocks_per_window, xpos:xpos+nblocks_per_window].ravel()
hog_feat2 = hog2[ypos:ypos+nblocks_per_window, xpos:xpos+nblocks_per_window].ravel()
hog_feat3 = hog3[ypos:ypos+nblocks_per_window, xpos:xpos+nblocks_per_window].ravel()
hog_features = np.hstack((hog_feat1, hog_feat2, hog_feat3)).reshape(1, -1)
xleft = xpos*pix_per_cell
ytop = ypos*pix_per_cell
test_features = X_scaler.transform(hog_features)
test_prediction = svc.predict(test_features)
if test_prediction == 1 or vis_bboxes == True:
xbox_left = np.int(xleft*scale)
ytop_draw = np.int(ytop*scale)
win_draw = np.int(window*scale)
rectangles.append(((xbox_left+xstart, ytop_draw+ystart),(xbox_left+win_draw+xstart,ytop_draw+win_draw+ystart)))
return rectangles
def get_rectangles(image, scales = [1, 1.5, 2, 2.5, 3],
ystarts = [400, 400, 450, 450, 460],
ystops = [528, 550, 620, 650, 700]):
out_rectangles = []
for scale, ystart, ystop in zip(scales, ystarts, ystops):
rectangles = find_cars(image, ystart, ystop, scale, svc, X_scaler, orient, pix_per_cell, cell_per_block, spatial_size, hist_bins)
if len(rectangles) > 0:
out_rectangles.append(rectangles)
out_rectangles = [item for sublist in out_rectangles for item in sublist]
return out_rectangles
def add_heat(heatmap, bbox_list):
# Iterate through list of bboxes
for box in bbox_list:
# Add += 1 for all pixels inside each bbox
# Assuming each "box" takes the form ((x1, y1), (x2, y2))
heatmap[box[0][1]:box[1][1], box[0][0]:box[1][0]] += 1
# Return updated heatmap
return heatmap
def apply_threshold(heatmap, threshold):
heatmap[heatmap <= threshold] = 0
return heatmap
def draw_labeled_bboxes(img, labels):
# Iterate through all detected cars
img_copy = np.copy(img)
result_rectangles = []
for car_number in range(1, labels[1]+1):
# Find pixels with each car_number label value
nonzero = (labels[0] == car_number).nonzero()
# Identify x and y values of those pixels
nonzeroy = np.array(nonzero[0])
nonzerox = np.array(nonzero[1])
# Define a bounding box based on min/max x and y
bbox = ((np.min(nonzerox), np.min(nonzeroy)), (np.max(nonzerox), np.max(nonzeroy)))
area = (bbox[1][1] - bbox[0][1]) * (bbox[1][0] - bbox[0][0])
if area > 40 * 40:
result_rectangles.append(bbox)
# Draw the box on the image
cv2.rectangle(img_copy, bbox[0], bbox[1], (0,0,255), 6)
# Return the image
return result_rectangles, img_copy
def draw_boxes(img, bboxes, color=(0, 0, 255), thick=6):
# copy the image
imcopy = np.copy(img)
random_color = False
# Iterate through the bounding boxes
for bbox in bboxes:
if color == 'random' or random_color:
color = (np.random.randint(0,255), np.random.randint(0,255), np.random.randint(0,255))
random_color = True
# Draw a rectangle given bbox coordinates
cv2.rectangle(imcopy, bbox[0], bbox[1], color, thick)
# Return the image copy with boxes drawn
return imcopy
def visualize_bboxes(image, scales = [1, 1.5, 2, 2.5, 3],
ystarts = [400, 400, 450, 450, 460],
ystops = [528, 550, 620, 650, 700]):
out_rectangles = []
for scale, ystart, ystop in zip(scales, ystarts, ystops):
rectangles = find_cars(image, ystart, ystop, scale, svc, X_scaler, orient, pix_per_cell, cell_per_block, spatial_size, hist_bins, vis_bboxes = True)
if len(rectangles) > 0:
out_rectangles.append(rectangles)
out_rectangles = [item for sublist in out_rectangles for item in sublist]
plt.figure(figsize=(20,10))
plt.imshow(draw_boxes(image, out_rectangles, color='random', thick=3))
from scipy.ndimage.measurements import label
result_images = []
result_boxes = []
heatmap_images = []
result_img_all_boxes = []
for test_image in test_images:
rectangles = get_rectangles(test_image)
result_img_all_boxes.append(draw_boxes(test_image, rectangles, color='random', thick=3))
heatmap_image = np.zeros_like(test_image[:, :, 0])
heatmap_image = add_heat(heatmap_image, rectangles)
heatmap_images.append(heatmap_image)
heatmap_image = apply_threshold(heatmap_image, 2)
labels = label(heatmap_image)
rectangles, result_image = draw_labeled_bboxes(test_image, labels)
result_boxes.append(rectangles)
result_images.append(result_image)
visualize_images(result_img_all_boxes, 2, "drawing boxes")
visualize_images(result_images, 2, "detection")
visualize_images(heatmap_images, 2, "Heatmap", cmap="hot")
class DetectionInfo():
def __init__(self):
self.max_size = 10
self.old_bboxes = queue.Queue(self.max_size)
self.heatmap = np.zeros_like(test_images[0][:, :, 0])
def get_heatmap(self):
self.heatmap = np.zeros_like(test_images[0][:, :, 0])
if self.old_bboxes.qsize() == self.max_size:
for bboxes in list(self.old_bboxes.queue):
self.heatmap = add_heat(self.heatmap, bboxes)
self.heatmap = apply_threshold(self.heatmap, 20)
return self.heatmap
def get_labels(self):
return label(self.get_heatmap())
def add_bboxes(self, bboxes):
if len(bboxes) < 1:
return
if self.old_bboxes.qsize() == self.max_size:
self.old_bboxes.get()
self.old_bboxes.put(bboxes)
def find_vehicles_video(image, detection_info):
bboxes = get_rectangles(image)
detection_info.add_bboxes(bboxes)
labels = detection_info.get_labels()
if len(labels) == 0:
result_image = image
else:
bboxes, result_image = draw_labeled_bboxes(image,labels)
return result_image
def find_vehicles_image(image):
rectangles = get_rectangles(image)
if not rectangles or len(rectangles) == 0:
return image
heatmap_image = np.zeros_like(image[:, :, 0])
heatmap_image = add_heat(heatmap_image, rectangles)
heatmap_image = apply_threshold(heatmap_image, 2)
labels = label(heatmap_image)
rectangles, result = draw_labeled_bboxes(image, labels)
return result
def find_vehicles(image, input_type, detection_info=None):
if input_type == 'image':
return find_vehicles_image(image)
elif input_type == 'video':
return find_vehicles_video(image, detection_info)
else:
raise RuntimeError("Input type must be either image or video.")
input_image_path = os.path.join('..', 'test_images', 'test5.jpg')
input_image = cv2.imread(input_image_path)
plt.imshow(find_vehicles(input_image, 'image'))
detection_info = DetectionInfo()
detection_info.old_heatmap = np.zeros_like(test_images[0][:, :, 0])
project_video_path = os.path.join('..', 'project_video.mp4')
project_video_output = os.path.join('..', 'output_video.mp4')
project_video = VideoFileClip(project_video_path)
white_clip = project_video.fl_image(lambda image: find_vehicles(image, 'video', detection_info))
white_clip.write_videofile(project_video_output, audio=False)